/**
 * @file 故事抓取
 * @module music/service/login
 * @author 月落 <yueluo.yang@qq.com>
 */

 /**
 * @requires wx-server-sdk - 云函数库
 * @requires axios - 网络请求库
 * @requires cheerio - 抓取页面模块
 * @requires libs/tools - 工具方法
 * @requires config/config - 链接配置
 */
const cloud = require('wx-server-sdk'),
      axios = require('axios').default,
      cheerio = require('cheerio'),
      tools = require('../libs/tools'),
      config = require('../config/config');

// 初始化云函数
cloud.init();
// 数据库实例
const db = cloud.database({
  env: config.env
});

/**
 * @description 获取网页
 * @param {string} url - 请求地址
 * @returns {string} 
 */
const getHtml = async (url) => {
  const html = await axios.get(url).then(res => res.data);
  return html;
}

/**
 * @description 获取指定网页故事集合
 * @param {string} htmlStr - HTML字符串
 * @returns {object} 
 */
const getList = async (htmlStr) => {
  const $ = cheerio.load(htmlStr);

  const result = [];

  $('.content section').toArray().forEach(section => {
    result.push({
      title: $(section).find('h2 > a').attr('title'),
      url: $(section).find('h2 > a').attr('href'),
      time: $(section).find('.postmeta time').text(),
      coverImgUrl: $(section).find('.thumbnail img').attr('src')
    });
  });

  return result;
}

/**
 * @description 解析故事详情
 * @param {object} $
 * @param {array} nodeList - 节点数组
 * @returns {array}
 */
const getStoryDetail = ($, nodeList) => {
  let result = [],
      idx = 0;

  nodeList.forEach(item => {
    const tagName = $(item).prop('nodeName').toLowerCase();

    if (tagName === 'h2') {
      result.push({
        title: $(item).text(),
        content: []
      });
      idx = result.length - 1;
    }

    if (result.length && tagName === 'p') {
      result[idx].content.push($(item).text());
    }
  });

  result = result.map(({ title, content }) => ({
    id: tools.uuid(),
    title: title.split('-')[1],
    text: content.join('')
  }));

  return result;
}

/**
 * @description 递归处理故事链接
 * @param {array} urlList - 网页链接集合
 * @returns {array} 
 */
const getStories = async (urlList) => {
  let result = [],
      total = urlList.length;

  while (total) {
    let data = await getHtml(urlList[--total]);
    const $ = cheerio.load(data);
    data = getStoryDetail($, $('.entry').children().toArray());
    result.push(data);
  }

  return result;
}

/**
 * @description 获取故事数据
 * @param {string} url - 原始路径 
 * @returns {array}
 */
const getStoriesData = async (url) => {
  const max_page = 3; // 爬取3页数据

  let result = [],
      idx = 1;

  while (idx++ <= max_page) {
    const htmlStr = await getHtml(`${url}/${idx}`);
    let stories = await getList(htmlStr);
    const content = await getStories(stories.map(item => item.url));
    stories = stories.map((item, index) => {
      delete item.url;
      return Object.assign(item, { content: content[index] });
    });
    result = result.concat(stories);
  }

  console.log('[crawler]：get data success', result.length);

  return result;
}

/**
 * @description 添加数据
 * @param {string} field - 故事类型
 * @param {array} stories - 故事数据
 * @returns {void} 
 */
const insertStoriesData = async (field, stories) => {
  let count = 0;

  while (count++ < stories.length) {
    const { errMsg } = await db.collection(`${field}_stories`).add({
      data: stories[count]
    });
    console.log(errMsg);
  }

  console.log('[crawler]：insert data success.');
}

/**
 * @description 故事爬取
 * @param {string} field - 故事类型
 *  sleep：睡前故事  fairy：童话故事  warm：暖心故事  love：爱情故事
 * @returns {object} 
 */
module.exports = async ({ field }) => {
  try {
    // 1. 获取数据
    const data = await getStoriesData(config[field]);
    // 2. 保存数据到数据库
    await insertStoriesData(field, data); 
    // 3. 返回状态码
    return {
      code: 0,
      message: 'crawler data success.'
    }
  } catch (error) {
    console.log('[crawler]：', error);
    return {
      code: -1,
      message: 'crawler data failed.'
    }
  }
}